#manual branding - file won't load
transcend_cols = c("#1A4C81","#59C3B4","#EF464B","#ADE0EE")
transcend_cols2 = c("#BC2582","#FFA630","#FFDE42","#99C24D","#218380","#D3B7D7")
transcend_grays = c("#4D4D4F","#9D9FA2","#D1D3D4")
transcend_na = transcend_grays[2]
theme_transcend = theme_gdocs(base_size = 14, base_family = "Open Sans") +
theme(
plot.title = element_text(family = "Bebas Neue", color = "black"),
plot.background = element_blank(),
axis.text = element_text(colour = "black"),
axis.title = element_text(colour = "black"),
panel.border = element_rect(colour = "#4D4D4F"),
strip.text = element_text(size = rel(0.8)),
plot.margin = margin(10, 24, 10, 10, "pt")
)
theme_set(theme_transcend)What do schools most want to pilot in the next 5 years?
tags %>%
select(school_id, starts_with("pilot")) %>%
pivot_longer(cols = starts_with("pilot"),
names_to = "variable",
values_to = "usage") %>%
group_by(variable) %>%
summarize(n = sum(usage),
pct = round(n/189, 2)) %>%
mutate(variable = str_replace_all(variable, "pilot", "practices")) %>%
left_join(labels, by = "variable") %>%
arrange(-n) %>%
slice_head(n = 5) %>%
ggplot(., aes(pct, reorder(label, pct))) +
geom_col(fill = transcend_cols[1]) +
scale_x_continuous(expand = c(0, 0), limits = c(0, .2), labels = scales::percent_format()) +
scale_y_discrete(labels = wrap_format(25)) +
geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)),
hjust = 1.1,
vjust = 0,
color = "white",
fontface = "bold",
size = 5.5,
family = "sans") +
theme(panel.grid.major.y = element_blank()) +
labs(x = "",
y = "",
title = str_wrap("Top 5 tags Canopy schools hope to pilot in the next 5 years", 60))Are there broad patterns in the practices schools hope to implement in the next five years?
#read in cluster information
cluster <- import(here("data", "mixed-methods-clusters-24.csv")) %>%
janitor::clean_names() %>%
select(tag, cluster = proposed_cluster_for_preliminary_24_analysis) %>%
mutate(cluster = case_when(
cluster == "Postsecondary" ~ "Postsecondary pathways",
cluster == "Ed justice" ~ "Educational justice",
cluster == "Individualized" ~ "Individualized learning",
TRUE ~ as.character(cluster)
))
#link to pilot tags
pilot_clusters <- tags %>%
select(school_id, starts_with("pilot")) %>%
pivot_longer(cols = starts_with("pilot"),
names_to = "variable",
values_to = "usage") %>%
mutate(variable = str_replace_all(variable, "pilot", "practices")) %>%
left_join(labels, by = "variable") %>%
select(school_id, tag = label, usage) %>%
left_join(cluster, by = "tag")
#cluster totals
tots <- pilot_clusters %>%
select(tag, cluster) %>%
unique() %>%
mutate(rate = 1) %>%
group_by(cluster) %>%
summarize(total = sum(rate))
#weighted total for each school
pilot_clusters %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
ungroup() %>%
left_join(tots, by = "cluster") %>%
mutate(pct = n/total) %>%
group_by(cluster) %>%
summarize(wt_sum = sum(pct),
wt_mean = mean(pct),
mean = mean(n),
median = median(n)) %>%
ggplot(., aes(reorder(cluster, -wt_mean), wt_mean)) +
geom_col(fill = transcend_cols[1]) +
scale_y_continuous(expand = c(0, 0), limits = c(0, .1), labels = scales::percent_format()) +
scale_x_discrete(labels = wrap_format(15)) +
theme(panel.grid.major.x = element_blank()) +
labs(y = "Average cluster percentage selected",
x = "",
title = str_wrap("Clusters Canopy schools are interested in piloting in the next 5 years", 60))pilot_clusters %>%
filter(usage == 1) %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
ungroup() %>%
left_join(tots, by = "cluster") %>%
mutate(pct = n/total) %>%
group_by(cluster) %>%
summarize(wt_sum = sum(pct),
wt_mean = mean(pct),
mean = mean(n),
median = median(n)) %>%
ggplot(., aes(reorder(cluster, -mean), mean)) +
geom_col(fill = transcend_cols[1]) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 2)) +
scale_x_discrete(labels = wrap_format(15)) +
geom_text(aes(label = round(mean, 2)),
vjust = -.2,
color = transcend_na,
fontface = "bold",
size = 5.5,
family = "sans") +
theme(panel.grid.major.x = element_blank()) +
labs(y = "Mean tags selected",
x = "",
title = str_wrap("Clusters Canopy schools are interested in piloting in the next 5 years", 60))Modifying the graph to include the number of tags in each cluster schools already use:
tags <- full %>%
select(school_id, starts_with("practices")) %>%
pivot_longer(cols = starts_with("practices"),
names_to = "variable",
values_to = "usage") %>%
left_join(labels, by = "variable") %>%
rename("tag" = label) %>%
left_join(cluster, by = "tag") %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
group_by(cluster) %>%
summarize(mean = mean(n)) %>%
mutate(type = "Practices in use")
pilot_clusters %>%
filter(usage == 1) %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
group_by(cluster) %>%
summarize(mean = mean(n)) %>%
mutate(type = "Practices to pilot") %>%
bind_rows(tags) %>%
ggplot(., aes(reorder(cluster, -mean), mean, fill = type)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = transcend_cols) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 15)) +
scale_x_discrete(labels = wrap_format(15)) +
geom_text(aes(label = round(mean, 2)),
position = position_stack(vjust = .5),
color = "white",
fontface = "bold",
size = 5.5,
family = "sans") +
theme(panel.grid.major.x = element_blank(),
legend.position = c(.8,.9)) +
labs(y = "Mean tags selected",
x = "",
title = str_wrap("Clusters Canopy schools are interested in piloting in the next 5 years", 60),
fill = "")Alternate version, using facet instead of stacking the
bars:
pilot_clusters %>%
filter(usage == 1) %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
group_by(cluster) %>%
summarize(mean = mean(n)) %>%
mutate(type = "Avg. number of pilot practices selected") %>%
bind_rows(tags) %>%
mutate(type = ifelse(type == "Practices in use", "Avg. number of practices already in use", as.character(type))) %>%
ggplot(., aes(mean, reorder(cluster, -mean))) +
geom_bar(stat = "identity", fill = transcend_cols[1]) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 15)) +
scale_y_discrete(labels = wrap_format(15)) +
geom_text(aes(label = round(mean, 2)),
vjust = .5,
hjust = -0.1,
color = transcend_na,
fontface = "bold",
size = 5.5,
family = "sans") +
theme(panel.grid.major.y = element_blank()) +
labs(x = "",
y = "",
title = str_wrap("What Canopy schools are interested in piloting in the next 5 years", 66),
fill = "") +
facet_wrap(~type)Exploring the graph above once more, using number of schools as the denominator rather than number of tags:
pilot_clusters %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
mutate(selection = ifelse(n > 0, 1, 0)) %>%
group_by(cluster) %>%
summarize(pct = sum(selection)/189) %>%
ggplot(., aes(reorder(cluster, -pct), pct)) +
geom_col(fill = transcend_cols[1]) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 1), labels = scales::percent_format()) +
scale_x_discrete(labels = wrap_format(15)) +
theme(panel.grid.major.x = element_blank()) +
geom_text(aes(label = scales::label_percent(accuracy = 1)(pct)),
nudge_y = 0.01,
vjust = 0,
color = transcend_na,
fontface = "bold",
size = 5.5,
family = "sans") +
labs(y = "Average cluster percentage selected",
x = "",
title = str_wrap("Clusters Canopy schools are interested in piloting in the next 5 years", 60))When we’re dividing pilot tags into clusters, which tags are selected the most?
pilot_clusters %>%
group_by(cluster, tag) %>%
summarize(n = sum(usage, na.rm = TRUE)) %>%
ungroup() %>%
group_by(cluster) %>%
arrange(-n) %>%
slice_head(n = 5) %>%
ggplot(., aes(n, reorder(tag, n))) +
geom_col(fill = transcend_cols[1]) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 32)) +
scale_y_discrete(labels = wrap_format(20)) +
theme(panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 10)) +
geom_text(aes(label = n),
vjust = .5,
hjust = -0.1,
color = transcend_na,
fontface = "bold",
size = 4.5,
family = "sans") +
facet_wrap(~cluster, scales = "free_y", labeller = label_wrap_gen(width = 20)) +
labs(y = "",
x = "",
title = str_wrap("What Canopy schools are most interested in piloting in the next 5 years", 75))When we’re dividing pilot tags into clusters, which tags are selected the least?
pilot_clusters %>%
group_by(cluster, tag) %>%
summarize(n = sum(usage, na.rm = TRUE)) %>%
ungroup() %>%
group_by(cluster) %>%
arrange(-n) %>%
slice_tail(n = 5) %>%
ggplot(., aes(n, reorder(tag, n))) +
geom_col(fill = transcend_cols[1]) +
scale_x_continuous(expand = c(0, 0), limits = c(0, 32)) +
scale_y_discrete(labels = wrap_format(20)) +
theme(panel.grid.major.y = element_blank(),
axis.text.y = element_text(size = 10)) +
geom_text(aes(label = n),
vjust = .5,
hjust = -0.1,
color = transcend_na,
fontface = "bold",
size = 4.5,
family = "sans") +
facet_wrap(~cluster, scales = "free_y", labeller = label_wrap_gen(width = 20)) +
labs(y = "",
x = "",
title = str_wrap("What Canopy schools are most interested in piloting in the next 5 years", 75))To what extent do school characteristics play a role in the tags they want to pilot?
Quick defs:
-Binary high/low = High if more than half the tags in that cluster are
in use
-Tri high/average/low = High if mean+SD; low if mean-SD, average within
1 SD
#create cluster score
cluster_score <- full %>%
select(school_id, starts_with("practices")) %>%
pivot_longer(cols = starts_with("practices"),
names_to = "variable",
values_to = "usage") %>%
left_join(labels, by = "variable") %>%
rename("tag" = label) %>%
left_join(cluster, by = "tag") %>%
group_by(school_id, cluster) %>%
summarize(n = sum(usage)) %>%
left_join(tots, by = "cluster") %>%
mutate(pct = n/total,
mean = mean(pct),
sd = sd(pct),
engagement_bi = case_when(
pct >= 0.5 ~ "High",
TRUE ~ "Low"
),
engagement_tri = case_when(
pct > mean + sd ~ "High",
pct < mean - sd ~ "Low",
TRUE ~ "Average"
)) %>%
#add outcome
left_join(pilot_clusters, by = c("school_id", "cluster")) %>%
group_by(school_id, cluster) %>%
mutate(pilot_score = sum(usage),
pilot_score = ifelse(pilot_score >= 1, 1, 0)) %>%
select(-c(usage,tag)) %>%
unique()
#prep base data
mod_dat <- full %>%
select(school_id, school_locale, school_type, grades_pk, grades_elementary, grades_middle, grades_high, school_enrollment, pct_bipoc, pct_ell, pct_frpl, pct_swd, leadership_diversity) %>%
mutate(leadership_diversity = gsub("people", "leaders", leadership_diversity),
school_locale = factor(school_locale, levels = c("Urban", "Suburban", "Rural", "Multiple")),
school_type = factor(school_type, levels = c("Public district school", "Public charter school", "Independent (private) school")),
leadership_diversity = factor(leadership_diversity, levels = c("0 - 24% leaders of color", "25 - 49% leaders of color", "50 - 74% leaders of color", "75 - 100% leaders of color")),
school_enrollment = as.numeric(scale(school_enrollment, center = TRUE, scale = TRUE))) %>%
mutate(across(starts_with("pct"), ~as.numeric(scale(., center = TRUE, scale = TRUE)))) %>%
drop_na()
# model function
log_model <- function(outcome, data, title){ #outcome needs to be dummy
#model
mod <- glm(as.formula(paste(outcome, "~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + leadership_diversity + engagement_tri")),
family = "binomial",
data = data)
# set labels
cov_labels <- c(
"school_typeIndependent (private) school" = "Independent (private) school",
"school_typePublic charter school" = "Public charter school",
"grades_pk" = "PreK",
"grades_elementary" = "Elementary",
"grades_middle" = "Middle",
"grades_high" = "High",
"school_enrollment" = "School Enrollment",
"pct_bipoc" = "% BIPOC students",
"pct_ell" = "% EL-designated students",
"pct_frpl" = "% FRPL-eligible",
"pct_swd" = "% Students with disabilities",
"school_localeMultiple" = "Multiple locales",
"school_localeSuburban" = "Suburban",
"school_localeRural" = "Rural",
"leadership_diversity25 - 49% leaders of color" = "25-49% leaders of color",
"leadership_diversity50 - 74% leaders of color" = "50-74% leaders of color",
"leadership_diversity75 - 100% leaders of color" = "75-100% leaders of color",
"engagement_triHigh" = "High engagement with cluster",
"engagement_triAverage" = "Average engagement with cluster",
"engagement_triLow" = "Low engagement with cluster"
)
#plot
plot <- tidy(mod, effects = "ran_pars", conf.int = TRUE) %>%
filter(term != "(Intercept)") %>%
mutate(exp_est = exp(estimate),
exp_min = exp(estimate - std.error),
exp_max = exp(estimate + std.error)) %>%
mutate(term = cov_labels[term]) %>%
ggplot(., aes(y = fct_reorder(term, exp_est), x = exp_est)) +
geom_linerange(aes(xmin = exp_min,
xmax = exp_max),
color = "blue") +
geom_point() +
geom_vline(xintercept = 1) +
scale_x_continuous(
trans = "log",
breaks = c(.0625, .25, .5, 1, 2, 4, 16),
labels = str_wrap(c("1/16 as likely", "1/4 as likely", "1/2 as likely", "Even", "2x as likely", "4x as likely", "16x as likely"), 10),
expand = expansion(0, 0)
) +
theme_transcend +
theme(panel.grid.major.y = element_blank()) +
labs(
x = "",
y = "",
title = str_wrap(title, 60))
return(plot)}Postsecondary pathways
Starting with Postsecondary pathways to see if we get expected results (high schools more likely to adopt).
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Postsecondary pathways")
log_model("pilot_score", dat, "School characteristics predicting piloting 1 or more tags related to Postsecondary Pathways")Educational Justice
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Educational justice")
log_model("pilot_score", dat, "School characteristics predicting piloting 1 or more tags related to Educational Justice")Deeper Learning
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Deeper learning")
log_model("pilot_score", dat, "School characteristics predicting piloting 1 or more tags related to Deeper Learning")Individualized Learning
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Individualized learning")
log_model("pilot_score", dat, "School characteristics predicting piloting 1 or more tags related to Individualized Learning")Increasing access & supports
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Increasing access & supports")
log_model("pilot_score", dat, "School characteristics predicting piloting 1 or more tags related to Increasing access & supports")Student-driven learning
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Student-driven learning")
log_model("pilot_score", dat, "School characteristics predicting piloting 1 or more tags related to Student-driven learning")ATTEMPT 2: USING GAM MODELS
The set of models below do not assume that the relationship between our school characteristics and tags is linear, allowing for a more complicated relationship between engagement with a cluster and the pilot tags selected (i.e., deals with some of the issues that may come from ceiling effects, or whatnot).
Deeper learning
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Deeper learning")
gam_model <- gam(pilot_score ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + leadership_diversity + s(pct),
data = dat)
#note to self: wrapping in s() stands for smoothing - does not assume linear relationship and instead allows model to take on the shape that makes the most sense
plot(gam_model)Educational justice
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Educational justice")
gam_model <- gam(pilot_score ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + leadership_diversity + s(pct),
data = dat)
plot(gam_model)Individualized learning
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Individualized learning")
gam_model <- gam(pilot_score ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + leadership_diversity + s(pct),
data = dat)
plot(gam_model)Postsecondary pathways
dat <- mod_dat %>%
left_join(cluster_score, by = "school_id") %>%
filter(cluster == "Postsecondary pathways")
gam_model <- gam(pilot_score ~ school_locale + school_type + grades_pk + grades_elementary + grades_middle + grades_high + school_enrollment + pct_bipoc + pct_ell + pct_frpl + pct_swd + leadership_diversity + s(pct),
data = dat)
plot(gam_model)Increasing access & supports
Error: A term has fewer unique covariate combinations than specified maximum degrees of freedom
Student-driven learning
Error: A term has fewer unique covariate combinations than specified maximum degrees of freedom